

***************************************************************
* SIRENE STOCK
***************************************************************
use "${file}\stocks-ent-2000-2018.dta", clear //  Aggregated SINERE files
rename * ,upper
rename YEAR Y

* keep only firms with valid firm id 
tostring SIREN, replace
replace SIREN="0"+SIREN if length(SIREN)<9
replace SIREN="0"+SIREN if length(SIREN)<9
replace SIREN="0"+SIREN if length(SIREN)<9
drop if length(SIREN)<9
duplicates drop SIREN Y, force
sort SIREN Y
order SIREN Y 

* adjust gender
replace SEXE="1" if SEXE=="M"
replace SEXE="2" if SEXE=="F"

* clean creation date
tostring ORI_DEMO_DATE, replace
gen year=substr(ORI_DEMO_DATE,1,4)
gen month=substr(ORI_DEMO_DATE,5,2)
gen day=substr(ORI_DEMO_DATE,7,2)
destring  month day year, replace force
drop if month>12|month<1
gen date1=mdy(month, day, year)
replace date1=. if year<1901
gen date2=date("`c(current_date)'","DMY")
format %td date1 date2

// define survival
bys SIREN: egen YMAX=max(Y) 
replace YMAX=. if YMAX==2015
gen SURVIE5=(YMAX-year>=5) if !missing(YMAX)
tab SURVIE5
gen SURVIE3=(YMAX-year>=3) if !missing(YMAX)
tab SURVIE3
label var SURVIE5 "Survival sirene"
label var SURVIE3 "Survival sirene"
label var YMAX "Year of extinction sirene"
rename year YCREAT
label var YCREAT "Year creation sirene"
drop month day ORI_DEMO_DATE date1 date2
label var APEN "APE sirene"

keep SIREN Y APEN YMAX YCREAT SURVIE* CJ SEXE REG_SIEGE DEPCOM_SIEGE ZE_SIEGE CONJ_COLLAB ETRANGER
save "${file}sirene.dta", replace

* survival file
use "${file}sirene.dta", clear
duplicates drop SIREN, force
keep SIREN YMAX SURVIE3 SURVIE5
tab SURVIE5
save "${file}survie.dta", replace

* Adjust sector for each company - make sure sector is available for every year of the panel
use  "${file}sirene.dta", clear
keep SIREN Y
sort SIREN Y
by SIREN: egen YMAX=max(Y)
by SIREN: keep if _n==1
expand 18
bys SIREN: gen YE=Y+_n-1
drop if YE>2017
drop if YE>YMAX
drop Y YMAX
rename YE Y 
save "${file}sirene_light.dta", replace

use  "${file}sirene.dta", clear
drop CJ CONJ_COLLAB
gen APE1=APEN if length(APEN)==4 // Sirene naf rev1
gen APE2=APEN if length(APEN)==5 // Sirene naf rev2

* complete missing sectors with upper years sector, or years before
merge 1:1 SIREN Y using "${file}sirene_light.dta", force
order SIREN Y APE*
sort SIREN Y
by SIREN: replace APE1=APE1[_n-1] if APE1=="" // Sirene naf rev1
by SIREN: replace APE1=APE1[_n+1] if APE1==""
by SIREN: replace APE2=APE2[_n-1] if APE2=="" // Sirene naf rev2
by SIREN: replace APE2=APE2[_n+1] if APE2==""
by SIREN: replace DEPCOM_SIEGE=DEPCOM_SIEGE[_n-1] if DEPCOM_SIEGE=="" 
by SIREN: replace DEPCOM_SIEGE=DEPCOM_SIEGE[_n+1] if DEPCOM_SIEGE==""
by SIREN: replace SEXE=SEXE[_n-1] if SEXE=="" 
by SIREN: replace SEXE=SEXE[_n+1] if SEXE=="" 
set more off
gsort SIREN -Y
by SIREN: replace APE1=APE1[_n-1] if APE1==""
by SIREN: replace APE2=APE2[_n-1] if APE2==""
by SIREN: replace SEXE=SEXE[_n-1] if SEXE=="" 
by SIREN: replace DEPCOM_SIEGE=DEPCOM_SIEGE[_n-1] if DEPCOM_SIEGE=="" 
count if missing(APE1)
count if missing(APE2)

keep SIREN Y APE1 APE2 DEPCOM_SIEGE SEXE YCREAT ETRANGER
save "${file}sirene_light.dta", replace


***************************************************************
* SIRENE STOCK CREATION
***************************************************************
clear all
* aggregate file sirene creation
/*
gen a=.
save "${file}dta\creation.dta", replace
forval i=2000/2017  {
use  "${file}\source\REE_creat\creat`i'.dta", clear
rename *, upper
gen Y=`i'
di `i'
append using "${file}dta\creation.dta"
save "${file}dta\creation.dta", replace
}
*/

use "{file}dta\creation.dta", clear
rename * ,upper
rename YEAR Y
sort SIREN Y
duplicates drop SIREN Y, force
* distinguish between sector rev 1 or rev 2 classification 
gen APE1=APEN if length(APEN)==4 
gen APE2=APEN if length(APEN)==5
* clean gender of the business owner
replace SEXE="1" if SEXE=="M"
replace SEXE="2" if SEXE=="F"
replace SEXE="0" if SEXE=="NA" 
* drop if firm is not at the right format
replace SIREN="0"+SIREN if length(SIREN)<9
replace SIREN="0"+SIREN if length(SIREN)<9
replace SIREN="0"+SIREN if length(SIREN)<9
drop if length(SIREN)<9
bys SIREN Y: gen dup = cond(_N==1,0,_n)
duplicates drop  SIREN Y, force 
* check date creation
replace ORI_DEMO_DATE=DATE if ORI_DEMO_DATE==.
destring ORI_DEMO_DATE, replace force
format %td ORI_DEMO_DATE ORI_DEMO_TRT_DATE
gen yq = qofd(ORI_DEMO_DATE)
format %tq yq
gen year =year(ORI_DEMO_DATE)
drop if year>2019|year<2000
replace Y = year
drop year DATE ORI_DEMO_TRT_DATE
sort  Y SIREN 

keep SIREN APEN APE1 APE2 SEXE DEPCOM_SIEGE CONJ CJ ORI_DEMO_DATE
save "${file}creation.dta", replace


***************************************************************
* FEMALE-DOMINATED SECTOR - SIRENE CREATION 
***************************************************************
set more off 
use "${file}creation.dta", clear
* drop if gender missing
drop if SEXE=="0"
* sectors - merge all levels
gen niv5=APE2 if Y>=2008
merge m:1 niv5 using "${file}nafniveauxrev2.dta"
drop if _merge==2
drop _merge
replace niv5=APE1 if Y<2008
merge m:1 niv5 using "${file}nafniveauxrev1.dta", update
drop if _merge==2
drop _merge
rename (niv1 niv2 niv3 niv4 niv5) (NAF1 NAF2 NAF3 NAF4 NAF5)
keep SIREN Y NAF* SEXE APE* 
* drop if mssing sector at the SIC 4 level
drop if NAF4==""

* calculate percentages of female-founded businesses by sector at different sector levels
bys NAF5 Y: gen NAF5_crea_n=_N
bys NAF5 Y SEXE: gen NAF5_crea_f=_N if SEXE=="2"
sort NAF5 Y NAF5_crea_f
bys NAF5 Y: replace NAF5_crea_f=NAF5_crea_f[1]
replace NAF5_crea_f=NAF5_crea_f/NAF5_crea_n

bys NAF4 Y: gen NAF4_crea_n=_N
bys NAF4 Y SEXE: gen NAF4_crea_f=_N if SEXE=="2"
sort NAF4 Y NAF4_crea_f
bys NAF4 Y: replace NAF4_crea_f=NAF4_crea_f[1]
replace NAF4_crea_f=NAF4_crea_f/NAF4_crea_n

bys NAF3 Y: gen NAF3_crea_n=_N
bys NAF3 Y SEXE: gen NAF3_crea_f=_N if SEXE=="2"
sort NAF3 Y NAF3_crea_f
bys NAF3 Y: replace NAF3_crea_f=NAF3_crea_f[1]
replace NAF3_crea_f=NAF3_crea_f/NAF3_crea_n

bys NAF2 Y: gen NAF2_crea_n=_N
bys NAF2 Y SEXE: gen NAF2_crea_f=_N if SEXE=="2"
sort NAF2 Y NAF2_crea_f
bys NAF2 Y: replace NAF2_crea_f=NAF2_crea_f[1]
replace NAF2_crea_f=NAF2_crea_f/NAF2_crea_n

bys NAF1 Y: gen NAF1_crea_n=_N
bys NAF1 Y SEXE: gen NAF1_crea_f=_N if SEXE=="2"
sort NAF1 Y NAF1_crea_f
bys NAF1 Y: replace NAF1_crea_f=NAF1_crea_f[1]
replace NAF1_crea_f=NAF1_crea_f/NAF1_crea_n

keep SIREN Y NAF* 
duplicates drop SIREN Y, force
destring Y, replace force
sort SIREN Y
save "${file}female_creation.dta", replace

use "${file}female_creation.dta", clear
duplicates drop NAF5 Y, force
drop SIREN
forval i=1/5 {
local v="NAF`i'_crea_f"
replace `v'=0 if `v'==.
}
save "${file}female_creation.dta", replace


***************************************************************
* FEMALE-DOMINATED SECTORS - SIRENE BUSINESS OWNERS
***************************************************************
set more off
use "${file}sirene_light.dta", clear
// drop if gender missing
drop if SEXE=="."|SEXE==""

// sectors - merge all levels
gen niv5=APE2 if Y>=2008
merge m:1 niv5 using "${file}nafniveauxrev2.dta"
drop if _merge==2
drop _merge
replace niv5=APE1 if Y<2008
merge m:1 niv5 using "${file}nafniveauxrev1.dta", update
drop if _merge==2
drop _merge
rename (niv1 niv2 niv3 niv4 niv5) (NAF1 NAF2 NAF3 NAF4 NAF5)
keep SIREN Y NAF* SEXE APE* ETRANGER
drop if NAF4==""

* calculate percentages of female-founded businesses by sector at different sector levels
bys NAF5 Y: gen NAF5_siren_n=_N
bys NAF5 Y SEXE: gen NAF5_siren_f=_N if SEXE=="2"
sort NAF5 Y NAF5_siren_f
bys NAF5 Y: replace NAF5_siren_f=NAF5_siren_f[1]
replace NAF5_siren_f=NAF5_siren_f/NAF5_siren_n

bys NAF4 Y: gen NAF4_siren_n=_N
bys NAF4 Y SEXE: gen NAF4_siren_f=_N if SEXE=="2"
sort NAF4 Y NAF4_siren_f
bys NAF4 Y: replace NAF4_siren_f=NAF4_siren_f[1]
replace NAF4_siren_f=NAF4_siren_f/NAF4_siren_n

bys NAF3 Y: gen NAF3_siren_n=_N
bys NAF3 Y SEXE: gen NAF3_siren_f=_N if SEXE=="2"
sort NAF3 Y NAF3_siren_f
bys NAF3 Y: replace NAF3_siren_f=NAF3_siren_f[1]
replace NAF3_siren_f=NAF3_siren_f/NAF3_siren_n

bys NAF2 Y: gen NAF2_siren_n=_N
bys NAF2 Y SEXE: gen NAF2_siren_f=_N if SEXE=="2"
sort NAF2 Y NAF2_siren_f
bys NAF2 Y: replace NAF2_siren_f=NAF2_siren_f[1]
replace NAF2_siren_f=NAF2_siren_f/NAF2_siren_n

bys NAF1 Y: gen NAF1_siren_n=_N
bys NAF1 Y SEXE: gen NAF1_siren_f=_N if SEXE=="2"
sort NAF1 Y NAF1_siren_f
bys NAF1 Y: replace NAF1_siren_f=NAF1_siren_f[1]
replace NAF1_siren_f=NAF1_siren_f/NAF1_siren_n

keep SIREN Y NAF* 
duplicates drop SIREN Y, force
destring Y, replace force
save "${file}female_sector.dta", replace
*
use  "${file}female_sector.dta", clear
duplicates drop NAF5 Y, force
drop SIREN
forval i=1/5 {
local v="NAF`i'_siren_f"
replace `v'=0 if `v'==.
}
save "${file}female_sector.dta", replace